import os
import tarfile
import urllib
import pandas as pd

import matplotlib.pyplot as plt

# 设置 pandas 中数据显示，避免显示不全
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

DOWNLOAD_ROOT = "https://raw.githubusercontent.com/ageron/handson-ml2/master/"
HOUSING_PATH = os.path.join("datasets","housing")
HOUSING_URL = DOWNLOAD_ROOT+"datasets/housing/housing.tgz"

def fetch_housing_data(housing_url=HOUSING_URL, housing_path=HOUSING_PATH):
  os.makedirs(housing_path,exist_ok=True)
  tgz_path = os.path.join(housing_path,"housing.tgz")
  urllib.request.urlretrieve(housing_url,tgz_path)
  housing_tgz = tarfile.open(tgz_path)
  housing_tgz.extractall(path=housing_path)
  housing_tgz.close()

# fetch_housing_data()

def load_housing_date(housing_path=HOUSING_PATH):
  csv_path = os.path.join(housing_path, "housing.csv")
  return pd.read_csv(csv_path)

# data = load_housing_date()
# print(data.head())

def test_hist():
  data = fetch_housing_data()




if __name__ == "__main__":
  # fetch_housing_data()
  data = load_housing_date()
  print(data.head())
  print(data.info())
  print("===========ocean_proximity -value-counts===============>")
  print(data["ocean_proximity"].value_counts())
  print("===========describe========================")
  print(data.describe())
